This notebook contains code to replicate analyses reported in CHI submission ID:5584 .

In VISUALIZATION VIBES project Study 2, participants completed an attitutde eliciation survey, asking questions about their attitude toward (5) stimulus images (data visualizations). Each participant was randomly assigned to one of 6 stimulus blocks, each containing 1 image from each of (4) pseudo-categories (ranging from most abstract to most figural). Each participant started by responding to questions for a single ‘common’ stimulus (that is thus super-powered as it was seen by all participants). Two participant recruitment pools were used: Prolific, with a smaller set of participants recruited from Tumblr (to replicate and compare survey results to Study 1 interviews with participants sourced from Tumblr).

This notebook contains code to replicate quantitative analysis of data from Study 2 reported in the CHI submission. Note that due to limited space, we were unable to report results for all stimulus blocks, and all possible analyses. A separate set of R notebooks are included in the supplementary materials that document analysis of the other blocks not reported here.

This notebook includes (3) primary analyses:

  1. Analysis of Stimulus Block 2: Here we offer a complete description of data collected from participants who were assigned to Stimulus Block #2 (the block shown in the paper teaser image). This analysis includes: (descriptive statistics) characterizing the distribution of responses for each question for each stimulus, analysis of confidence scores along with response consistency across participants. note that this block is representative of the patterns of variance between individuals and between stimuli that are seen in the other 5 stimulus blocks [see supplemental materials].

  2. Analysis of Stimulus B0-0 “Millennial Pink Plants”: He we offer an analysis that includes data from all participants, because before being assigned to one of the six stimulus blocks, each participant completed the set of questions to the single ‘common’ image ID B0-0 which our research team refers to as “Millennial Pink Plants” (based on how Study 1 interviewees characterized the image).

  3. TODO DESCRIPTION OF FULL STIM SET MODELLING.

1 SETUP

We start by importing data files previously wrangled in 0_VIBES_S2_wrangling.Rmd.

############## IMPORT REFERENCE FILES
ref_stimuli <- readRDS("data/input/REFERENCE/ref_stimuli.rds")
ref_surveys <- readRDS("data/input/REFERENCE/ref_surveys.rds")
############## IMPORT DATA FILES
# df_data <- readRDS("data/output/df_data.rds") #1 row per participant — WIDE
df_participants <- readRDS("data/output/df_participants.rds") #1 row per participant — demographic
df_questions <- readRDS("data/output/df_questions.rds") #1 row per question — LONG
df_sd_questions_wide <- readRDS("data/output/df_sd_questions_wide.rds") # only sd questions WIDE
df_sd_questions_long <- readRDS("data/output/df_sd_questions_long.rds") # only sd questions LONG
df_tools <- readRDS("data/output/df_tools.rds") #multiselect format for tools Question
df_actions <- readRDS("data/output/df_actions.rds") # multiselect format for action Question
# df_graphs_full <- readRDS("data/output/df_graphs_full.rds") #includes free response data
df_graphs <- readRDS("data/output/df_graphs.rds") #only categorical and numeric questions
############## SETUP Graph Labels
ref_stim_id <- levels(ref_stimuli$ID)
ref_cat_questions <- c("MAKER_ID","MAKER_AGE","MAKER_GENDER")
ref_free_response <- c("MAKER_DETAIL", "MAKER_EXPLAIN", "TOOL_DETAIL", "CHART_EXPLAIN")
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- c("MAKER_DESIGN","MAKER_DATA","MAKER_POLITIC",
               "MAKER_ARGUE","MAKER_SELF","MAKER_ALIGN","MAKER_TRUST",
               "CHART_LIKE", "CHART_BEAUTY", "CHART_INTENT", "CHART_TRUST")
left <- c("professional","professional","left-leaning","confrontational",
          "altruistic","does NOT share","untrustworthy",
          "NOT at all","NOT at all", "inform", "untrustworthy")
right <- c("layperson","layperson","right-leaning","diplomatic",
           "selfish", "DOES share", "trustworthy",
           "very much", "very much", "persuade", "trusthworthy")
ref_labels <- as.data.frame(cbind(left,right))
rownames(ref_labels) <- ref_sd_questions
# ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
ref_blocks <- c(1,2,3,4,5,6)
rm(left,right)
############## SETUP Colour Palettes
#https://www.r-bloggers.com/2022/06/custom-colour-palettes-for-ggplot2/

## list of color pallettes
my_colors = list(
  politics = c("#184aff","#5238bf", "#4f4a52" ,"#84649c", "#ff0000"),
  blackred = c("black","red"),
  greens = c("#ADC69D","#81A06D","#567E39","#2D5D16","#193E0A"),
  smallgreens = c("#ADC69D","#567E39","#193E0A"),
  olives = c("#CDCEA1","#B8B979","#A0A054","#78783F","#50502A","#35351C"),
  lightblues = c("#96C5D2","#61A2B2","#3C8093","#2C6378","#1F4A64"),
  darkblues = c("#7AAFE1","#3787D2","#2A73B7","#225E96","#1A4974","#133453"),
  reds = c("#D9B8BD","#CE98A2","#B17380","#954E5F","#78263E","#62151F"),
  traffic = c("#CE98A2","#81A06D","yellow")
)



## function for using palettes
my_palettes = function(name, n, all_palettes = my_colors, type = c("discrete","continuous"), direction = c("1","-1")) {
  palette = all_palettes[[name]]
  if (missing(n)) {
    n = length(palette)
  }
  type = match.arg(type)
  out = switch(type,
               continuous = grDevices::colorRampPalette(palette)(n),
               discrete = palette[1:n]
  )
  out = switch(direction,
               "1" = out,
               "-1" = palette[n:1])
  structure(out, name = name, class = "palette")
}
############## RETURNS SD STACKED AND COLORED BY BY X
## LOOP STYLE
multi_sd <- function (df, left, right, x, y, color) {

  # g <- ggplot(df, aes(y = .data[[x]], x = {{y}}, color = {{color}}))+
  g <- ggplot(df, aes(y = .data[[x]], x = .data[[y]], color = .data[[color]]))+
  geom_boxplot(width = 0.5) +
  geom_jitter(width = 0.1, alpha=0.5) +
    
  scale_y_continuous(limits=c(-1,101)) +
  labs(x="", y="") +
  coord_flip() +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) + theme_minimal()

  return(g)
}


############## RETURNS SINGLE SD 
## LOOP STYLE
single_sd <- function (df, left, right, x) {

  g <- ggplot(df, aes(y = {{x}}, x = ""))+
  geom_boxplot(width = 0.5) +
  geom_jitter(width = 0.1, alpha=0.5) +
  scale_y_continuous(limits=c(-1,101)) +
  labs(x="", y="") +
  coord_flip() +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) + theme_minimal()

  return(g)
}



######## RETURNS SINGLE SD
##  APPLY STYLE
# plot_sd = function (data, column, type, split, boxplot) {
# 
#   ggplot(df, aes(y = .data[[column]], x="")) +
#     {if(boxplot) geom_boxplot(width = 0.5) } +
#     geom_jitter(width = 0.1, alpha=0.3, {if(split) aes(color=Distribution)}) +
#     {if(split) facet_grid(Distribution ~ .)} +
#     scale_y_continuous(limits=c(-1,101)) +
#     labs(x="", y="") +
#     coord_flip()  +
#     {if(type == "S")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[column,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
#       )} +
#     {if(type == "Q")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[q,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
#       )} +
#   theme_minimal()  +
#      labs (
#        caption = column
#      )
# }

######## RETURNS SINGLE SD
##  APPLY STYLE
plot_sd = function (data, column, type, facet, facet_by, boxplot) {

  ggplot(df, aes(y = .data[[column]], x="")) +
    {if(boxplot) geom_boxplot(width = 0.5) } +
    geom_jitter(width = 0.1, alpha=0.3, {if(facet) aes(color=.data[[facet_by]])}) +
    {if(facet) facet_grid(.data[[facet_by]] ~ .)} +
    scale_y_continuous(limits=c(-1,101)) +
    labs(x="", y="") +
    coord_flip()  +
    {if(type == "S")
      guides(
        y = guide_axis_manual(labels = ref_labels[column,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
      )} +
    {if(type == "Q")
      guides(
        y = guide_axis_manual(labels = ref_labels[q,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
      )} +
  theme_minimal()  +
     labs (
       caption = column
     ) + easy_remove_legend()
}

2 DESCRIBE SAMPLE

2.1 Sample Demographics

df <- df_participants

## FOR DESCRIPTIVES PARAGRAPH
# #PROLIFIC
df.p <- df %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table()
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)

# #TUMBLR
df.t <- df %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table()
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)

For study 2, a total of 318 participants were recruited from US-located English speaking users of TUMBLR (n = 78) and PROLIFIC (n = 240).

240 individuals from PROLIFIC participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).

78 individuals from Tumblr participated in Study 2, ( 36% Female, 5% Male, 40% Non-binary, 19% Other). Note that a higher proportion of participants recruited from TUMBLR report identities other than cis-gender Female and cis-gender Male.

2.2 Study Response Time

df <- df_participants

## for descriptives paragraph
p.desc.duration <- psych::describe(df %>% filter(Distribution=="PROLIFIC") %>% pull(duration.min))
t.desc.duration <- psych::describe(df %>% filter(Distribution=="TUMBLR") %>% pull(duration.min))

PROLIFIC SAMPLE (n = 240 ) participant response times ranged from 13.97 to 216.18 minutes, with a mean response time of 42.49 minutes, SD = 21.15.

TUMBLR SAMPLE (n = 78 ) participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 51.93 minutes, SD = 35.47.

rm(df, df.p, df.t, p.desc.duration, t.desc.duration, desc.gender.p, desc.gender.t, p_participants, t_participants)

3 FULL SAMPLE ANALYSIS

As we argue in our manuscript, we understand that an individual’s response to a visualization (both inferences about data, as well as any other behaviours) will vary based on properties of: (1) the visualization, (2) the data, (3) the individual, and (4) the situational context. Thus, our survey is not designed to uncover consistencies in behaviour, but rather, explore the nature of variance in behaviour as a function of the individual and visualization. For this reason, we do not expect to see any systematic relationships between survey variables.

3.1 SAMPLE

#full stimulus-level data
df_full <- df_graphs %>% 
  mutate(
    STUDY = "" #dummy variable for univariate visualizations
  )
# %>%
#   mutate(MAKER_ID = fct_rev(MAKER_ID))

(n = 318 ) survey respondents answered questions about some subset of the stimuli, (common stimulus B0-0 and 4 additional images defined as a block), yielding (o = 1590) stimulus-level observations.

3.1.1 CONFIDENCE IN ATTRIBUTIONS

When asking participants to identify the type, age and gender of the maker of a visualization, we also asked participants to indicate their confidence in these choices.

Across all participants and all stimuli, are these questions answered with the same degree of confidence? Here we examine both the central tendency (mean) and shape of the distribution for each confidence variable.

df <- df_full %>% select(Distribution, MAKER_CONF, AGE_CONF, GENDER_CONF, TOOL_CONF) %>% 
  pivot_longer(
    cols = c(MAKER_CONF, AGE_CONF, GENDER_CONF, TOOL_CONF),
    names_to = "QUESTION",
    values_to = "CONFIDENCE"
  )


## B
## CONFIDENCE ACROSS QUESTIONS (all stimuli, all Pps)
## BOXPLOT W/ JITTER
df %>% ggplot(aes(x=QUESTION, y= CONFIDENCE)) + 
  geom_boxplot(width = 0.5) + 
  geom_jitter(alpha = 0.25, position=position_dodge2(width = 0.25)) + 
  stat_summary(fun=mean, geom="text", colour="blue",  fontface = "bold", 
               vjust=+0.5, hjust = -1.25, aes( label=round(..y.., digits=0)))+
  stat_summary(fun=mean, geom="point", size = 4, colour="blue")+
  theme_minimal() + 
  labs(title = "Confidence by Survey Question", caption = "(mean in blue)")

df %>% 
  ggplot(aes(x=CONFIDENCE, y=QUESTION)) + 
  geom_density_ridges(scale = 0.75, quantile_lines = TRUE, alpha = 0.75) + 
  scale_x_continuous(limits = c(0,100))+
  theme_minimal() + 
  labs(title = "Confidence by Survey Question")
## Picking joint bandwidth of 4.54

3.1.2 MAKER ID

Participants were asked:

Who do you think is most likely responsible for having this image created?
options: [business or corporation / journalist or news outlet / educational or academic institution / government or political organization / other organization / an individual] (select one)
The response is stored as MAKER_ID

Participants were also asked: Please rate your confidence in this choice. The response is stored as MAKER_CONF .

#FILTER DATASET
df <- df_full


## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
S <-   ggbarstats( data = dx, x = MAKER_ID, y = STUDY,
                   legend.title = "MAKER ID") + 
    scale_fill_manual(values = my_palettes(name="reds", direction = "1")) +
    theme_minimal() +
    labs( title = "",  x = "", y="") + 
    theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################


  
## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <- df %>% 
  group_by(MAKER_ID) %>% 
  mutate(count = n(), m = mean(MAKER_CONF)) %>% 
  ggplot(aes(y = MAKER_CONF, x = fct_rev(MAKER_ID), color = fct_rev(MAKER_ID))) + 
   geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(MAKER_ID))) + 
  # geom_boxplot(width = 0.6)+
  # geom_jitter(position=position_jitterdodge(width ), alpha = 0.2) + 
  stat_summary(fun=mean, geom="text", colour="blue",  fontface = "bold", 
               vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+

  scale_color_manual(values = my_palettes(name="reds", direction = "-1"), guide = guide_legend(reverse = TRUE)) + 
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA,
                        aes(fill = fct_rev(MAKER_ID)) , color="black",  point_interval = "mean_qi")  +
  scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), guide = guide_legend(reverse = TRUE)) + 
    stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
    geom_text(aes(label= paste0("n=",count) ,  y = 5), color = "black",
            size = 3, nudge_x=0.35) + 
  labs(y="Maker ID Confidence", x="") + 
  theme_minimal() + 
  easy_remove_legend()+
  coord_flip() 
##############################
  

(p <- (S + H)) + plot_annotation(
  title = "Maker ID and Confidence",
  # subtitle = "the categories of MAKER ID were chosen in similar proportion, 
  # and both the mean (in blue) and shape of distribution of confidence scores is similar across values of Maker ID",
  caption = "(blue indicates mean)"
)

3.1.3 MAKER AGE

Participants were asked:
Take a moment to imagine the person(s) responsible for creating the imageWhat generation are they most likely from?
options: [boomers (60+ years old) / Generation X (44-59 years old) / Millennials (28-43 years old) / Generation Z (12 - 27 years old] (select one)
The response was saved as MAKER_AGE

Participants were asked: Please rate your confidence in this choice. The response is stored as AGE_CONF .

#FILTER DATASET
df <- df_full


## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_AGE = fct_rev(MAKER_AGE) )
S <-   ggbarstats( data = dx, x = MAKER_AGE, y = STUDY,
                   legend.title = "MAKER AGE") + 
    scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +
    theme_minimal() +
    labs( title = "",  x = "", y="") + 
    theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################


  
## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <-  df %>% 
  group_by(MAKER_AGE) %>% 
  mutate(count = n(), m = mean(MAKER_CONF)) %>% 
  ggplot(aes(y = MAKER_CONF, x = fct_rev(MAKER_AGE), color = fct_rev(MAKER_AGE))) + 
    geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(MAKER_AGE))) + 
    stat_summary(fun=mean, geom="text", colour="blue",  fontface = "bold", 
               vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
    scale_color_manual(values = my_palettes(name="lightblues", direction = "-1"), 
                       guide = guide_legend(reverse = TRUE)) + 
    scale_fill_manual(values = my_palettes(name="lightblues", direction = "-1"), 
                      guide = guide_legend(reverse = TRUE)) + 
    stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, 
                        aes(fill = fct_rev(MAKER_AGE)) , color="black",  point_interval = "mean_qi")  +
    stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
    geom_text(aes(label= paste0("n=",count) ,  y = 5), color = "black",
            size = 3, nudge_x=0.35) + 
  labs(y="Maker Age Confidence", x="") + 
  theme_minimal() + 
  easy_remove_legend()+
  coord_flip() 
##############################
  

(p <- (S + H)) + plot_annotation(
  title = "Maker AGE and Confidence",
  # subtitle = "The value
  # distribution of confidence scores is similar across values of Maker AGE",
  caption = "(blue indicates mean)"
)

3.1.4 MAKER GENDER

Participants were asked:
Take a moment to imagine the person(s) responsible for creating the imageWhat gender do they most likely identify with?
options: [female / male / other ] (select one)
Responses were stored as MAKER_GENDER.

Participants were asked: Please rate your confidence in this choice. The response is stored as GENDER_CONF .

#FILTER DATASET
df <- df_full


## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_AGE = fct_rev(MAKER_AGE) )
S <-   ggbarstats( data = dx, x = MAKER_GENDER, y = STUDY,
                   legend.title = "MAKER GENDER") + 
    scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +
    theme_minimal() +
    labs( title = "",  x = "", y="") + 
    theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <-  df %>% 
  group_by(MAKER_GENDER) %>% 
  mutate(count = n(), m = mean(GENDER_CONF)) %>% 
  ggplot(aes(y = GENDER_CONF, x = fct_rev(MAKER_GENDER), color = fct_rev(MAKER_GENDER))) + 
    geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(MAKER_GENDER))) + 
    stat_summary(fun=mean, geom="text", colour="blue",  fontface = "bold", 
               vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
    scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1"), 
                       guide = guide_legend(reverse = TRUE)) + 
    scale_fill_manual(values = my_palettes(name="smallgreens", direction = "-1"), 
                      guide = guide_legend(reverse = TRUE)) + 
    stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, 
                        aes(fill = fct_rev(MAKER_GENDER)) , color="black",  point_interval = "mean_qi")  +
    stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
    geom_text(aes(label= paste0("n=",count) ,  y = 5), color = "black",
            size = 3, nudge_x=0.35) + 
  labs(y="Maker Gender Confidence", x="") + 
  theme_minimal() + 
  easy_remove_legend()+
  coord_flip() 
##############################
  

(p <- (S + H)) + plot_annotation(
  title = "Maker GENDER and Confidence",
  # subtitle = "The value
  # distribution of confidence scores is similar across values of Maker AGE",
  caption = "(blue indicates mean)"
)

3.1.5 TOOL ID

Participants were asked: What tools do you think were most likely used to create this image?
options: [basic graphic design software (e.g. Canva, or similar) / advanced graphic design software (e.g. Adobe Illustrator, Figma, or similar) / data visualization software (e.g. Tableau, PowerBI, or similar)/ general purpose software (e.g. MS Word/Excel, Google Sheets, or similar) / programming language (e.g. R, python, javascript, or similar) ] (select all that apply)
The response was saved as variable TOOL_ID (multi-select)

Participants were asked: Please rate your confidence in this choice. The response is stored as TOOL_CONF .

#FILTER DATASET
df <- df_tools %>% 
  mutate(
    STUDY = ""
  )


## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
S <-   ggbarstats( data = df, x = TOOL_ID, y = STUDY,
                   legend.title = "TOOL ID") + 
    scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
    theme_minimal() +
    labs( title = "",  x = "", y="") + 
    theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################




## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <-  df %>% 
  group_by(TOOL_ID) %>% 
  mutate(count = n(), m = mean(TOOL_CONF)) %>% 
  ggplot(aes(y = TOOL_CONF, x = fct_rev(TOOL_ID), color = fct_rev(TOOL_ID))) + 
    geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(TOOL_ID))) + 
    stat_summary(fun=mean, geom="text", colour="blue",  fontface = "bold", 
               vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
    scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
    scale_color_paletteer_d("awtools::a_palette", direction = 1)+
    stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, 
                        aes(fill = fct_rev(TOOL_ID)) , color="black",  point_interval = "mean_qi")  +
    stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
    geom_text(aes(label= paste0("n=",count) ,  y = 5), color = "black",
            size = 3, nudge_x=0.35) + 
  labs(y="Tool ID Confidence", x="") + 
  theme_minimal() + 
  easy_remove_legend()+
  coord_flip() 
##############################
  

(p <- (S + H)) + plot_annotation(
  title = "TOOL ID and Confidence",
  # subtitle = "The value
  # distribution of confidence scores is similar across values of Maker AGE",
  caption = "(blue indicates mean)"
)

rm(df, df_full, p, S,H)

4 BLOCK 2 ANALYSIS

In the interest of space, for the manuscript we report analysis of one stimulus block (stimulus block == 2; pictured in the teaser image), rather than analysis of each of the six blocks. Each block contains for images, one of each “abstraction category” ranging from [1] most abstract (at left) to [4] most figural (at right). Within each block we selected visualizations such that there was diversity in: (1) chart type (2) source (i.e. where the graphic actually came from), and (3) what we as authors perceived to be aesthetic “vibe”, (i.e) a diversity of what we subjectively judged to be sophisticated and unsophisticated, aesthetically pleasing, not pleasing, etc. The goal in structuring stimulus blocks was to offer each participant a set of visualizations that varied in multiple ways.

BLOCK 2 STIMULI
B2-1 B2-2 B2-3 B2-4
“Bluesy Economist Stacked Area” “Significant Excel Error Bars” “Slick Desert Heat Map” “Holmesian Hospital Beds”

4.1 SAMPLE

#block 2 participant-level data 
df_b2_p <- df_participants %>% filter(Assigned.Block ==2)

#block 2 stimulus-level data 
df_b2 <- df_graphs %>% 
  filter(str_detect(STIMULUS,"B2"))
# %>%
#   mutate(MAKER_ID = fct_rev(MAKER_ID))

(n = 52 ) survey respondents answered questions about the (4) stimuli in block 2, yielding (o = 208) observations.

4.2 MAKER QUESTIONS

4.2.1 MAKER ID

Participants were asked:

Who do you think is most likely responsible for having this image created?
options: [business or corporation / journalist or news outlet / educational or academic institution / government or political organization / other organization / an individual] (select one)
The response is stored as MAKER_ID

4.2.1.1 Maker ID by Stimulus

#FILTER DATASET
df <- df_b2

## A
## MAKER IDENTIFICATION BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_ID)) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER ID by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="reds", direction = "1"), name = "",  guide = guide_legend(reverse = FALSE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  theme_minimal()
##############################



## B
## MAKER IDENTIFICATION BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
B <-
  ggbarstats( data = dx, 
            x = MAKER_ID, y = STIMULUS,
            legend.title = "MAKER ID",
            results.subtitle = FALSE) + 
    scale_fill_manual(values = my_palettes(name="reds", direction = "1"))+
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "MAKER ID BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER ID REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- dx$MAKER_ID %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, MAKER_ID, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = MAKER_ID
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = node, 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color = "white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  labs(title = "MAKER ID by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "MAKER ID",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, dx, order)

To explore whether variance in maker identifications is more likely a function of participant or stimulus , we visualize the MAKER_ID data via an alluvial diagram. Here we see that there are no stable identifications (ie. paths through each stimulus with the same category) indicating that no participants had a strong preference for a particular identification category. Rather, participants seem to be identifying the maker category as a function of some features of the stimuli, rather than a trait/state inference about makers of all stimuli in general.

4.2.1.2 Maker ID Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as MAKER_CONF .

## DESCRIBE MAKER CONFIDENCE
df <- df_b2 %>% 
  select(PID, STIMULUS, MAKER_ID, MAKER_CONF) 

desc.maker_conf <- psych::describe(df$MAKER_CONF)

## H 
## Distribution of MAKER_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = MAKER_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Maker ID Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.maker_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker identifications] ranging from1 to 100 , with a mean of 64.7. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty).

4.2.1.3 Maker ID Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_ID, MAKER_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(MAKER_CONF), 
         max = max(MAKER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_ID, MAKER_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(MAKER_CONF), 
         max = max(MAKER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(STIMULUS,MAKER_CONF)) + 
  geom_boxplot(width = 0.3)+ 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) + 
  labs(
    title = "Confidence in Maker ID",
    y = "MAKER ID CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.maker_conf$min, " to ", desc.maker_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################


# MAKER_CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = MAKER_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER ID CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in maker identification differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( 31 of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale, with the exception of two subjects near 50%. The mean confidence score for each stimulus (blue) is greater than 50%, and no participants indicated consistently low confidence across all stimuli, indicating that in general, participants were able to respond thoughtfully to the maker identification question.

4.2.1.4 Maker ID & Confidence (by Stimulus)

df <- df_b2

## MC
## MAKER_CONFIDENCE by IDENTIFICATION  
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = MAKER_CONF, color = MAKER_ID)) + 
  geom_boxplot(position=position_dodge(0.8), width = 0.5)+
  geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
  scale_color_manual(values = my_palettes(name="reds", direction = "1")) +   
  # scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "", 
        x = "", y = "Maker ID Confidence") + 
  theme_minimal() 
###################################################


# CONFIDENCE by ID
C <- ggplot(df, aes( x = (MAKER_ID), y = MAKER_CONF, color = (MAKER_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_manual(values = my_palettes(name="reds", direction = "1"), guide = guide_legend(reverse = FALSE)) +   
  labs (x = "MAKER ID") + 
  easy_add_legend_title("MAKER ID ")+
  theme_minimal() 


# DISPLAY GRAPHS
 (p <- ( B/MC / C)  + plot_annotation(
   title = "MAKER ID & CONFIDENCE" ,
   subtitle = '',
   caption = ''))

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.maker_conf, p_lowvar)

Inspecting Maker ID Confidence by Maker ID category, there does not appear to be any systematic relationship between the category of Maker a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of MAKER(ID) = ORGANIZATION , for example, are made with greater or less confidence than identifications of MAKER(ID) = INDIVIDUAL. Statistical tests of independence (between confidence and maker id) are ill-advised because the number of observations of each maker ID vary greatly between stimuli.

Taken together, this analysis of correspondence between MAKER_ID and MAKER_CONF suggests that both the inference identifying the kind of maker as well as the participant’s confidence in that inference are likely conditioned on some features of the stimulus rather than more stable state/trait(s) of the participant.

4.2.2 MAKER AGE

Participants were asked:
Take a moment to imagine the person(s) responsible for creating the imageWhat generation are they most likely from?
options: [boomers (60+ years old) / Generation X (44-59 years old) / Millennials (28-43 years old) / Generation Z (12 - 27 years old] (select one)
The response was saved as MAKER_AGE

4.2.2.1 Maker AGE by Stimulus

#FILTER DATASET
df <- df_b2

## A
## MAKER AGE BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_AGE)) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER AGE by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +   
    #scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()
##############################


## B
## MAKER AGE BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate(MAKER_AGE = fct_rev(MAKER_AGE))
B <- ggbarstats( data = dx, 
            x = MAKER_AGE, y = STIMULUS,
            legend.title = "MAKER AGE",
            results.subtitle = FALSE)  +
    scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +   
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "MAKER AGE BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER AGE REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- dx$MAKER_AGE %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, MAKER_AGE, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = MAKER_AGE
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = node, 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color="white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="lightblues", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  
  labs(title = "MAKER AGE by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "MAKER AGE",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, dx, order)

To explore whether variance in maker age attributions are more likely a function of participant or stimulus , we visualize the MAKER_AGE data via an alluvial diagram. Here we see that (unlike MAKER_ID) there are a number of stable identifications (ie. paths through each stimulus with the same category) indicating that some participants consistently attributed all stimuli to a particular age category. This occured most often with the ‘Millenial’ and ‘GenX’ age categories. However, this pattern may be confounded by both the true base rate of maker age (inferred based on working age of likely makers).

4.2.2.2 Maker AGE Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as AGE_CONF .

## DESCRIBE MAKER AGE CONFIDENCE
df <- df_b2 %>% 
  select(PID, STIMULUS, MAKER_AGE, AGE_CONF) 

desc.age_conf <- psych::describe(df$AGE_CONF)

## H 
## Distribution of AGE_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = AGE_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Maker AGE Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.age_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker age attribution] ranging from8 to 100 , with a mean of 62.1. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty).

4.2.2.3 Maker AGE Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_AGE, AGE_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(AGE_CONF), 
         max = max(AGE_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_AGE, AGE_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(AGE_CONF), 
         max = max(AGE_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(x = STIMULUS, y = AGE_CONF)) + 
  scale_y_continuous(limits=c(0,100))+
  geom_boxplot(width = 0.3) + 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1"))+
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
  labs(
    title = "Confidence in Maker AGE",
    y = "MAKER AGE CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.age_conf$min, " to ", desc.age_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################



# MAKER_AGE CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = AGE_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER AGE CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in maker age differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( r* p_lowvar of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale, with the exception of (2) participants who have low variance scores clustered around 50% and (2) participants clustered around 25%. The mean confidence score for each stimulus (blue) is greater than 50%, and no participants indicated consistently low confidence across all stimuli, indicating that in general, participants were able to respond thoughtfully to the maker age question.

4.2.2.4 Maker AGE & Confidence (by Stimulus)

df <- df_b2

## MC
## MAKER AGE CONFIDENCE by IDENTIFICATION  
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = AGE_CONF, color = MAKER_AGE)) + 
  geom_boxplot(position=position_dodge(0.8), width = 0.5)+
  geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
  scale_color_manual(values = my_palettes(name="lightblues", direction = "1")) +   
  # scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "", 
        x = "", y = "Maker Age Confidence") + 
  theme_minimal() 
###################################################


# CONFIDENCE by ID
C <- ggplot(df, aes( x = (MAKER_AGE), y = AGE_CONF, color = (MAKER_AGE))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_manual(values = my_palettes(name="lightblues", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  labs (x = "MAKER AGE") + 
  easy_add_legend_title("MAKER AGE")+
  theme_minimal() 


# DISPLAY GRAPHS
 (p <- ( B/MC / C)  + plot_annotation(
   title = "MAKER AGE & CONFIDENCE" ,
   subtitle = '',
   caption = ''))

Inspecting Maker AGE Confidence by Maker AGE category, there does not appear to be any systematic relationship between the category of Maker AGE a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of MAKER(AGE) = MILLENIAL , for example, are made with greater or less confidence than identifications of MAKER(AGE) = BOOMER. Statistical tests of independence (between confidence and maker age) are ill-advised because the number of observations of each maker age vary greatly between stimuli.

Taken together, this analysis of correspondence between MAKER AGE and MAKER AGE CONFIDENCE suggests that both the inference identifying the age generation of maker (and the participant’s confidence in that inference) are conditioned on some features of the stimulus rather than more stable state/trait(s) of the participant.

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.age_conf, p_lowvar)

4.2.3 MAKER GENDER

Participants were asked:
Take a moment to imagine the person(s) responsible for creating the imageWhat gender do they most likely identify with?
options: [female / male / other ] (select one)
Responses were stored as MAKER_GENDER.

4.2.3.1 Maker GENDER by Stimulus

#FILTER DATASET
df <- df_b2

## A
## MAKER GENDER BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = fct_rev(MAKER_GENDER))) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER GENDER by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +   
    #scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()
##############################


## B
## MAKER GENDER BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate(MAKER_GENDER = (MAKER_GENDER))
B <- ggbarstats( data = dx, 
            x = MAKER_GENDER, y = STIMULUS,
            legend.title = "MAKER GENDER",
            results.subtitle = FALSE)  +
    scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +   
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "MAKER GENDER BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER GENDER REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- df_b2$MAKER_GENDER %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, MAKER_GENDER, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = MAKER_GENDER
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = node, 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color="white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="smallgreens", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  
  labs(title = "MAKER GENDER by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "MAKER GENDER",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, dx, order)

To explore whether variance in MAKER_GENDER inferences are more likely a function of participant or stimulus , we visualize the MAKER_GENDER data via an alluvial diagram. Here we see that (unlike MAKER_ID) there are a number of stable identifications (ie. paths through each stimulus with the same category) indicating that some participants consistently attributed all stimuli to a particular gender category. It is difficult to determine whether this pattern of results indicates individuals are making stable assumptions about the gender of makers independetly of stimuli (i.e. a state/trait of the participant) or alternatively, if participants are responding to the features of the stimulus and the consistency of responses across stimuli is an artifact of the limited number of gender categories. To further explore inferences about maker gender we will attend to stimuli that show large variance in distribution across gender category levels (e.g. B2-1 (vs) B2-3), especially stimuli that have a larger proportion of ‘FEMALE’ identifications.

4.2.3.2 Maker GENDER by PARTICIPANT GENDER

df <- df_b2

## MAKER GENDER BY STIMULUS AND PARTICIPANT GENDER
## FACETED BAR CHART 
ggplot( df, aes( x = (STIMULUS), fill = fct_rev(MAKER_GENDER))) +
  geom_bar(position = "stack") +
  facet_grid( . ~ D_gender_collapsed, scales = "free", space = "free") + 
  scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1"),name = "MAKER GENDER") + 
  labs( title = "MAKER GENDER by STIMULUS and PARTICIPANT GENDER",
        subtitle = "Facet indicates participant gender", x = "STIMULUS") +
  theme_minimal()

##############################

Note that the same pattern of attribution (stimuli B2-1,2 and 4 primarily male, with B2-3 a higher proportion of females), is consistent across the participant’s own gender identities. It is NOT the case that female participants attribute most stimuli to women, while male participants attribute most stimuli to men. Rather, these results indicate that participants (regardless of their own gender identity) are identifying a maker’s gender based on some features of the stimulus in combination with state/trait-based assumptions about maker gender identities in general. More data are needed to address this question more thoroughly.

4.2.3.3 Maker GENDER Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as GENDER_CONF .

## DESCRIBE MAKER GENDER CONFIDENCE
df <- df_b2 %>% 
  select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) 

desc.gender_conf <- psych::describe(df$GENDER_CONF)

## H 
## Distribution of GENDER_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = GENDER_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Maker GENDER Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.gender_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker gender attribution] ranging from0 to 100 , with a mean of 54. According to (one-sample) student’s t-tests, the mean of the distribution is NOT significantly different (at the 0.01 alpha level) from both reference values of 50% (indicating partial confidence). Relative to MAKER ID and MAKER AGE, participants expressed less confidence in their attribution of MAKER GENDER

4.2.3.4 Maker GENDER Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(GENDER_CONF), 
         max = max(GENDER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(GENDER_CONF), 
         max = max(GENDER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER GENDER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(x = STIMULUS, y = GENDER_CONF)) + 
  geom_boxplot(width = 0.3) + 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
  labs(
    title = "Confidence in Maker GENDER",
    y = "MAKER GENDER CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.gender_conf$min, " to ", desc.gender_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################



# MAKER_GENDER CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER GENDER CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in maker GENDER differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( r* p_lowvar of the 52 participants.) Note that the participants with low variance in confidence scores are not clustered near the high end of the confidence scale as with confidence in maker ID and maker AGE … rather, low variance is maker GENDER confidence seems to be evenly distributed across the confidence scale. The mean confidence score for each stimulus (blue) are near 50%.

Unlike MAKER ID and MAKER AGE, confidence in MAKER GENDER is generally more idiosyncratic, and potentially reflective of an individual participant’s state/trait inference about maker GENDER in general, rather than responding to particular features of each stimuli. To verify, we will look for stimuli that have strong patterns of difference in relative proportion of each level of the MAKER GENDER variable.

4.2.3.5 Maker GENDER & Confidence (by Stimulus)

df <- df_b2

## MC
## MAKER GENDER CONFIDENCE by IDENTIFICATION  
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF, color = MAKER_GENDER)) + 
  geom_boxplot(position=position_dodge(0.8), width = 0.5)+
  geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
  scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "", 
        x = "", y = "Maker Gender Confidence") + 
  theme_minimal() 
###################################################


# CONFIDENCE by ID
C <- ggplot(df, aes( x = (MAKER_GENDER), y = GENDER_CONF, color = (MAKER_GENDER))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  labs (x = "MAKER GENDER") + 
  easy_add_legend_title("MAKER GENDER")+
  theme_minimal() 


# DISPLAY GRAPHS
 (p <- ( B/MC / C)  + plot_annotation(
   title = "MAKER GENDER & CONFIDENCE" ,
   subtitle = '',
   caption = ''))

Inspecting MAKER GENDER Confidence by MAKER GENDER category, there does not appear to be any systematic relationship between the category of Maker GENDER a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of MAKER(GENDER) = MALE , for example, are made with greater or less confidence than identifications of MAKER(GENDER) = FEMALE. Statistical tests of independence (between confidence and maker age) are ill-advised because the number of observations of each maker age vary greatly between stimuli.

Taken together, this lack of variance in GENDER_CONF within-subjects and between stimuli, along with the limited number of categorical choices for the MAKER GENDER variable indicates that the inference identifying the gender generation of the maker (and the participant’s confidence in that inference) may infact be conditioned on some relatively stable state/trait(s) of the individual participant, rather than features of the stimulus. To verify this intuition we will look for stimuli with highly variant distributions of maker gender categories. (e.g. largely female or other,rather than male)

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.gender_conf, p_lowvar)

4.3 TOOL QUESTIONS

4.3.1 TOOL CHOICE

Participants were asked: What tools do you think were most likely used to create this image?
options: [basic graphic design software (e.g. Canva, or similar) / advanced graphic design software (e.g. Adobe Illustrator, Figma, or similar) / data visualization software (e.g. Tableau, PowerBI, or similar)/ general purpose software (e.g. MS Word/Excel, Google Sheets, or similar) / programming language (e.g. R, python, javascript, or similar) ] (select all that apply)
The response was saved as variable TOOL_ID (multi-select)

4.3.1.1 Tool ID by Stimulus

#FILTER DATASET
df <- df_tools %>% filter( str_detect(STIMULUS, "B2"))

## A
## TOOL ID BY STIMULUS AND SAMPLE
## FACETED BAR CHART 
A <- ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = fct_rev(STIMULUS) )) +
  geom_bar(position = "stack") +
  # coord_flip() + 
  facet_grid(Distribution ~ ., scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  # scale_y_continuous(labels = scales::percent) + 
  labs( title = "TOOL ID by Stimulus (grouped by Category)",
        subtitle = "", x = "") +
  easy_add_legend_title("TOOL_ID") +
  theme_minimal() 
##############################

## B
## TOOL ID  BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
# dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
B <-ggbarstats( data = df, 
            x = TOOL_ID, y = STIMULUS,
            legend.title = "TOOL ID",
            results.subtitle = FALSE) + 
    scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
    labs( title = "TOOL ID BY STIMULUS ", subtitle = "", x = "",
          caption = "note: different n across stimuli indicate answers were multiple-select")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

A

B

##note: can't do sankey on tool_id because it is a multi-select field, so we can't track flow for one person through each node [stimulus]

4.3.1.2 Tool ID Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as TOOL_CONF .

## DESCRIBE TOOL CONFIDENCE
df <- df_tools %>% 
  filter(str_detect(STIMULUS,"B2")) %>% 
  select(PID, STIMULUS, TOOL_CONF) %>% 
  summarize( .by=c(PID,STIMULUS),
    TOOL_CONF = mean(TOOL_CONF) #this is a dummy just to collapse b/c the values will all be the same
  )

desc.tool_conf <- psych::describe(df$TOOL_CONF)

## H 
## Distribution of MAKER_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = TOOL_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Tool ID Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.tool_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their tool ID identification] ranging from0 to 100 , with a mean of 71.4. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty). Relative to MAKER_ID, MAKER_AGE AND MAKER_GENDER, participants were more confident in their identification of the tools used to create a particular image.

4.3.1.3 Tool ID Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_tools %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, TOOL_ID, TOOL_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(TOOL_CONF), 
         max = max(TOOL_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_tools %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, TOOL_ID, TOOL_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(TOOL_CONF), 
         max = max(TOOL_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER GENDER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(x = STIMULUS, y = TOOL_CONF)) + 
  geom_boxplot(width = 0.3) + 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
  labs(
    title = "Confidence in TOOL",
    y = "TOOL CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.tool_conf$min, " to ", desc.tool_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################



# MAKER_GENDER CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER GENDER CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in TOOL_ID differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( r* p_lowvar of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale (like the pattern of confidence in MAKER_ID). Across all stimuli, TOOL_ID confidence scores are higher than confidence for the maker identifications. The mean confidence score for each stimulus (blue) are all greater than 50%.

While there are some insubstantial number of participants who have low variance in tool id confidence, the range of confidence scores is smaller than those for other identifications. This makes it difficult to determine if participants have true high confidence in their choices based on each stimulus, or have some stable confidence across all stimuli.

4.3.1.4 Tool ID & Confidence by Stimulus

# FILTER DATAFRAME
df <- df_tools %>% filter( str_detect(STIMULUS, "B2"))

######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = (STIMULUS), fill = fct_rev(TOOL_ID))) + 
  geom_bar(position = "stack", width = 0.8) +
  scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") +
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

# TOOL_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = (STIMULUS), y = TOOL_CONF, color = fct_rev(TOOL_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_paletteer_d("awtools::a_palette", direction = 1) +
  labs (x = "STIMULUS") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

# TOOL_CONFIDENCE by CHOICE
C <- ggplot(df, aes( x = (TOOL_ID), y = TOOL_CONF, color = fct_rev(TOOL_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_paletteer_d("awtools::a_palette", direction = 1) +
  labs (x = "TOOL_ID") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

(p <- (a / b / C )  + plot_annotation(
  title = 'TOOL ID & CONFIDENCE',
  subtitle = '',
  caption = ''))

###################################################

Inspecting TOOL_ID Confidence by TOOL_ID category, there does not appear to be any systematic relationship between the category of TOOL_ID a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of TOOL(ID) = DESIGN_ADVANCED , for example, are made with greater or less confidence than identifications of TOOL(ID) = VIZ_BASIC. There are relatively few instances where participants chose the ‘?’ or “unknown” option.

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.tool_conf, p_lowvar)

4.4 BEHAVIOUR QUESTIONS

4.4.1 ENCOUNTER CHOICE

Participants were asked:

As you’re scrolling through your [social media] feed, you see this image. What would you do?
options: [keep scrolling / pause, and look at image] (select one)
The answer was stored as ENCOUNTER.

4.4.1.1 Encounter choice by Stimulus

#FILTER DATASET
df <- df_b2 

## A
## ENCOUNTER  BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = fct_rev(ENCOUNTER))) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "ENCOUNTER by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="traffic", direction = "1"), name = "",  guide = guide_legend(reverse = FALSE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  theme_minimal()
##############################



## B
## ENCOUNTER  BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
B <-
  ggbarstats( data = df, 
            x = ENCOUNTER, y = STIMULUS,
            legend.title = "ENCOUNTER",
            results.subtitle = FALSE) + 
    scale_fill_manual(values = my_palettes(name="traffic", direction = "1"))+
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "ENCOUNTER BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER ID REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- df %>% select(ENCOUNTER) #%>% mutate(ENCOUNTER = fct_rev(ENCOUNTER)) 
order <- order$ENCOUNTER %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, ENCOUNTER, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = ENCOUNTER
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 


#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = fct_rev(node), 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color = "white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="traffic", direction = "1"), guide = guide_legend(reverse = FALSE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  labs(title = "ENCOUNTER by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "ENCOUNTER",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, order, A,B,S)

4.4.2 ACTION CHOICE

The final question participants were asked to answer after responding to all previous questions was:

Now that you have encountered this image while scrolling through your social media feed, which of the following are you most likely to do? (select all that apply)
options: [post a comment / share-repost / share-repost WITH comment / look up more information about the topic or source / unfollow or block the source / NOTHING ] (select all)
The answer was stored as CHART_ACTION.

4.4.2.1 Actions Choice by Stimulus

#FILTER DATASET
df <- df_actions %>% filter( str_detect(STIMULUS, "B2"))



## A
# ACTION CHOICE BY STIMULUS
## FACETED BAR CHART 
A <- ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
  geom_bar(position = "stack") +
  # coord_flip() + 
  facet_grid(Distribution ~ ., scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  # scale_y_continuous(labels = scales::percent) + 
  labs( title = "Chart Action by Stimulus",
        subtitle = "", x = "") +
  easy_add_legend_title("ACTION") +
  theme_minimal() 
##############################


## B
## TOOL ID  BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( CHART_ACTION = fct_rev(CHART_ACTION) )
 B <-ggbarstats( data = dx, 
            x = CHART_ACTION, y = STIMULUS,
            legend.title = "CHART ACTION",
            results.subtitle = FALSE) + 
    scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
    labs( title = "CHART ACTION BY STIMULUS ", subtitle = "", x = "",
          caption = "note: different n across stimuli indicate answers were multiple-select")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

A

B

##note: can't do sankey on tool_id because it is a multi-select field, so we can't track flow for one person through each node [stimulus]

4.5 SEMANTIC DIFFERENTIAL QUESTIONS

4.5.1 ALL Q BY STIMULUS

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){


#################### ALL QUESTIONS AT STIMULUS #####################
# ONE PLOT FOR EACH STIMULUS WITH ALL QUESTIONS

#set stimuli to be graphed 
# stimuli <- ref_stimuli #created in wrangling block ##TODO IF NOT WORK ref_stim_id
# stimuli <- c("B0-0","B2-1" ,"B2-2", "B2-3", "B2-4")
#DEFINE BLOCK 2 STIMULI
stimuli <- c("B2-1" ,"B2-2", "B2-3", "B2-4")


box_stimuli <- list()
rain_stimuli <- list()
i = 0

for (s in stimuli){
  i = i+1
  
  # setup dataframe 
  title <- ref_stimuli %>% filter(ID == s) %>% select(NAME)  ##TODO IF NOT WORK ref_stim_id
  title <- paste(s,"|",title)
  df <- df_graphs %>% filter(STIMULUS== s)

  
  #### BOXPLOT PLOT
  #subset data cols 
  cols <- df %>% select( all_of(ref_sd_questions))
  plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", facet = FALSE, boxplot=TRUE))

  #aggregate q plots into one for stimulus 
  x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
   plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] + 
   plot_annotation(
     title = title,
     subtitle =""
   )
  
  box_stimuli[[i]] <- x
  ggsave(plot = x, path="figs/all_q_by_stimulus", filename =paste0(s,"_box.png"), units = c("in"), width = 10, height = 14  )
  #### END BOXPLOT PLOT  
  
  # #### RAINCLOUD PLOT
  # setup dataframe
  title <- ref_stimuli %>% filter(ID == s) %>% select(NAME) ##TODO IF NOT WORK ref_stim_id
  title <- paste(s,"|",title)
  
  # df <- df_sd_questions_wide %>% select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>%  mutate(value = get(s))
  df <- df_sd_questions_wide %>% select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>%  mutate(value = get(s)) %>% filter(!is.na(value))

  
  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value)) +
                      # fill = fct_rev(QUESTION))) + #remove color fill on raincloud questions
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.7) +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  guides(
    # y = guide_axis_manual(labels = levels(df$QUESTION), ref_labels$left)),
    y = guide_axis_manual(labels = rev(ref_labels$left), title = ""),
    y.sec = guide_axis_manual(labels = rev(ref_labels$right))
  ) +
  cowplot::draw_text(text = ref_sd_questions, x = 90, y= ref_sd_questions,size = 10, vjust=-2) + 
  labs (title = title, y = "") +
  theme_minimal()  
  

  rain_stimuli[[i]] <- x
  ggsave(plot = x, path="figs/all_q_by_stimulus", filename =paste0(s,"_rain.png"), units = c("in"), width = 10, height = 14  )
  # #### END RAINCLOUD PLOT  
  
  
}

#GIVE NAMES TO LIST
#ACCESS plots by rain_stimuli$`B1-1`
#ACCESS plots by rain_stimuli$[[1]]
names(box_stimuli) <- stimuli ## box stimuli aren't working
names(rain_stimuli) <- stimuli
rm(x, i, plots)


#############################################################################
} 
rain_stimuli
## $`B2-1`

## 
## $`B2-2`

## 
## $`B2-3`

## 
## $`B2-4`

4.5.2 DONTUS BY STIMULUS

#################### ALL QUESTIONS AT STIMULUS #####################
# ONE PLOT FOR EACH STIMULUS WITH ALL QUESTIONS

#set stimuli to be graphed 
# stimuli <- ref_stimuli #created in wrangling block ##TODO IF NOT WORK ref_stim_id
# stimuli <- c("B0-0","B2-1" ,"B2-2", "B2-3", "B2-4")
#DEFINE BLOCK 2 STIMULI
stimuli <- c("B2-1" ,"B2-2", "B2-3", "B2-4")
cols = ref_cat_questions
palettes = c("reds","blues","greens")
donut_stimuli <- list()
i = 0


for (s in stimuli){
  i = i+1
  a = 0
  
  
  for (c in cols){
  a = a+1
  
  # setup title 
  title <- ref_stimuli %>% filter(ID == s) %>% select(NAME)  ##TODO IF NOT WORK ref_stim_id
  title <- paste(s,"|",title)
  
  df <- df_questions %>% filter(STIMULUS ==s) %>% filter(QUESTION==c)
  
  print(paste0(title, " | ", c))
  
  ##ONLY SAVE IF RENDER GRAPHS IF SET TO TRUE
  if(graph_render == TRUE){
  # CREATE A CHART TO SACE  
  PieChart(value, data = df, pdf_file = paste0("figs/donuts_by_stimulus/",s," ",c),
         fill = palettes[a], quiet=TRUE,
         main = paste0(s, " ",c)) + theme_minimal()
  }
  
  # CREATE A CHART TO DISPLAY 
  ## because lessR is DUMB and can't do both at the same time
  PieChart(value, data = df, 
         fill = palettes[a], quiet=FALSE,
         main = paste0(s, " ",c)) + theme_minimal()
 
  }
}
## [1] "B2-1 | Bluesy Economist Stacked Area | MAKER_ID"
## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                business  education   news  organization  political    Total 
## Frequencies:         17         19     11             1          4       52 
## Proportions:      0.327      0.365  0.212         0.019      0.077    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 23.769, df = 4, p-value = 0.000 
## 
## 
## [1] "B2-1 | Bluesy Economist Stacked Area | MAKER_AGE"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                boomer  gen-x  gen-z  millennial    Total 
## Frequencies:        1     31      3          17       52 
## Proportions:    0.019  0.596  0.058       0.327    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 44.923, df = 3, p-value = 0.000 
## 
## 
## [1] "B2-1 | Bluesy Economist Stacked Area | MAKER_GENDER"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                Female   Male  Other    Total 
## Frequencies:        6     42      4       52 
## Proportions:    0.115  0.808  0.077    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 52.769, df = 2, p-value = 0.000 
## 
## 
## [1] "B2-2 | Significant Excel Error Bars | MAKER_ID"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                business  education  individual   news  political    Total 
## Frequencies:         14         18          12      3          5       52 
## Proportions:      0.269      0.346       0.231  0.058      0.096    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 15.115, df = 4, p-value = 0.004 
## 
## 
## [1] "B2-2 | Significant Excel Error Bars | MAKER_AGE"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                boomer  gen-x  gen-z  millennial    Total 
## Frequencies:       15     18      7          12       52 
## Proportions:    0.288  0.346  0.135       0.231    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 5.077, df = 3, p-value = 0.166 
## 
## 
## [1] "B2-2 | Significant Excel Error Bars | MAKER_GENDER"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                Female   Male  Other    Total 
## Frequencies:       10     37      5       52 
## Proportions:    0.192  0.712  0.096    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 34.192, df = 2, p-value = 0.000 
## 
## 
## [1] "B2-3 | Slick Desert HeatMap | MAKER_ID"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##        value Count   Prop 
## ------------------------- 
##     business    5   0.096 
##    education    8   0.154 
##   individual    1   0.019 
##         news   27   0.519 
## organization    3   0.058 
##    political    8   0.154 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 50.923, df = 5, p-value = 0.000 
## 
## 
## [1] "B2-3 | Slick Desert HeatMap | MAKER_AGE"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                boomer  gen-x  gen-z  millennial    Total 
## Frequencies:        1     14      4          33       52 
## Proportions:    0.019  0.269  0.077       0.635    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 48.154, df = 3, p-value = 0.000 
## 
## 
## [1] "B2-3 | Slick Desert HeatMap | MAKER_GENDER"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                Female   Male  Other    Total 
## Frequencies:       21     27      4       52 
## Proportions:    0.404  0.519  0.077    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 16.423, df = 2, p-value = 0.000 
## 
## 
## [1] "B2-4 | Holmesian Hospital Beds | MAKER_ID"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##        value Count   Prop 
## ------------------------- 
##     business    4   0.077 
##    education    7   0.135 
##   individual    2   0.038 
##         news   20   0.385 
## organization    3   0.058 
##    political   16   0.308 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 32.692, df = 5, p-value = 0.000 
## 
## 
## [1] "B2-4 | Holmesian Hospital Beds | MAKER_AGE"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                boomer  gen-x  gen-z  millennial    Total 
## Frequencies:       14     23      2          13       52 
## Proportions:    0.269  0.442  0.038       0.250    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 17.077, df = 3, p-value = 0.001 
## 
## 
## [1] "B2-4 | Holmesian Hospital Beds | MAKER_GENDER"

## >>> suggestions
## PieChart(value, hole=0)  # traditional pie chart
## PieChart(value, values="%")  # display %'s on the chart
## PieChart(value)  # bar chart
## Plot(value)  # bubble plot
## Plot(value, values="count")  # lollipop plot 
## 
## --- value --- 
## 
##                Female   Male  Other    Total 
## Frequencies:        9     41      2       52 
## Proportions:    0.173  0.788  0.038    1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 49.885, df = 2, p-value = 0.000
#############################################################################

5 TODO B0 SUPERPOWER ANALYSIS

5.1 TODO MAKER

5.2 TODO TOOL

5.3 TODO BEHAVIOR

5.4 TODO SEMANTIC DIFFERENTIALS

5.5 TODO DATA MODELLING

6 TODO LIMITED MODELLING

6.1 FULL STIMULUS SET

6.1.1 What predicts TRUST?

design competence? data competence? chart intent? maker id?

6.1.2 What predicts BEAUTY?

6.1.3 What predicts ACTION?

6.1.4 Does MAKER ID predict MAKER POLITICS?

6.1.5 Does MAKER ID predict CHART INTENT?

6.1.6 Does MAKER AGE TOOL ID

6.1.7 Does MAKER GENDER anything?

  • highlight maker_gender = female points in raincloud SD plots

6.1.8 DOES TOOL ID predict DATA (VS) DESIGN competence

6.1.9 DOES CHART(TYPE) MAKER(DATA COMPETENCE)

6.1.10 DOES MAKER ID=individual imply TOOL(ID)=[simple]

6.1.11 Worth doing any stimulus abstraction category level analysis?

6.1.11.1 PREDICT more abstract corresponds with low design competence (opposite for figural)

7 TODO ARGUMENTS

7.1 TODO look for examples of inconsistent relationships between TRUST and BEAUTY

7.2 TODO look for examples of inconsistent relationships between TRUST and DATA COMPETENCE

7.3 TODO look for patterns of reversal between CHART LIKE and CHART BEAUTY

8 STASH

wip code stash

8.1 lessR donuts

##good for seeing the color schemes 
# #### DEFINE SET 
# stimulus  = "B2-1"
# df <- df_graphs %>% filter(STIMULUS == stimulus)
# 
# #### GENERATE GRAPHS
# 
#   #MAKER_ID-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "reds",
#          main = paste0(stimulus, " MAKER ID")) + theme_minimal()
# 
# 
# #MAKER_GENDER-DONUT
# PieChart(MAKER_GENDER, data = df,
#        fill = "blues",
#        main = paste0(stimulus, " MAKER GENDER")) + theme_minimal()
# 
# 
# #MAKER_AGE-DONUT
# PieChart(MAKER_AGE, data = df,
#        fill = "olives",
#        main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
# 
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "rusts",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "olives",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "greens",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "emeralds",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "turquoises",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "aquas",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-MAKER_ID
#   PieChart(MAKER_ID, data = df,
#          fill = "purples",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "magentas",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "violets",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#   
#   #MAKER_AGE-DONUT
#   PieChart(MAKER_ID, data = df,
#          fill = "grays",
#          main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
# "reds"    h   0
# "rusts"   h   30
# "browns"  h   60
# "olives"  h   90
# "greens"  h   120
# "emeralds"    h   150
# "turquoises"  h   180
# "aquas"   h   210
# "blues"   h   240
# "purples" h   270
# "violets" h   300
# "magentas"    h   330
# "grays"

8.2 ggplot donuts

#   df <- df_graphs %>% filter(STIMULUS== s)
# #### CATEGORICAL DONUT PLOTS
#   #subset data cols 
#   cols <- df %>% select( all_of(ref_cat_questions))
#   
#   ggplot( df, aes( x = STIMULUS, fill = MAKER_ID)) +
#   geom_bar( position = "stack", width=1) +
#   coord_radial(theta = "y", start = 0, inner.radius = 0.5, expand=FALSE) +
#   scale_fill_manual(values = my_palettes(name="reds", direction = "1"), name = "",  guide = guide_legend(reverse = FALSE)) +   
#   labs( title = paste0(s, " MAKER ID")) +
#   theme_minimal()
#   
#   

8.3 Alluvial Plots

## EXAMPLE ALLUVIAL PLOT USING GGALUVIAL  (instead of GGSANKEY)
# https://corybrunson.github.io/ggalluvial/articles/ggalluvial.html

# #FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
# ds <- df_graphs %>% 
#   filter(str_detect(STIMULUS, "B2")) %>% 
#   select(STIMULUS, MAKER_ID, PID) %>% 
#   mutate(
#     MAKER_ID = fct_relevel(MAKER_ID, 
#               c("business","education","individual", "news","organization", "political" ))
#   )
# 
# ds %>% 
#   ggplot(aes( x = STIMULUS,
#               stratum = MAKER_ID,
#               label = MAKER_ID,
#               alluvium = PID)) +
#       stat_alluvium(aes(fill = MAKER_ID),
#                     width = 0,
#                     alpha = 1,
#                     geom = "flow")+
#       geom_stratum(width = 0.2, aes(fill= MAKER_ID))+
#       # geom_text(stat = "stratum", size = 5, angle = 90)+
#       scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE,
#                      alpha = 1) +
#       theme_minimal()